package com.xian.scala.wc

import org.apache.flink.api.scala.{DataSet, ExecutionEnvironment}
import org.apache.flink.api.scala._

object MyWC2 {
  def main(args: Array[String]): Unit = {
    //获取执行环境
    val env: ExecutionEnvironment = ExecutionEnvironment.getExecutionEnvironment
    //读取文本文件
    val file: DataSet[String] = env.readTextFile("j:/a.txt")
    //这里要导入import org.apache.flink.api.scala._  不然会报错
    val words: DataSet[String] = file.flatMap(line => line.split(" "))
    //映射为一个二元组
    val word_1: DataSet[(String, Int)] = words.map(word => (word, 1))
    //按照第一个字段分组
    val group_word: GroupedDataSet[(String, Int)] = word_1.groupBy(0)
    //分组后按照第二个字段累加
    val sum_word: AggregateDataSet[(String, Int)] = group_word.sum(1)

    //打印结果
    sum_word.print()




  }

}
